% multivariate cases: 
% prepossing data: cell: {label}, {lead 0}, {lead 1}, {timeline}
% Then reshuffle
data_path_class_1 = '/Users/liuyiyao/Documents/UB_study/Research/Experiments/early_classification/data/ecg/abnormal';
data_path_class_2 = '/Users/liuyiyao/Documents/UB_study/Research/Experiments/early_classification/data/ecg/normal';
data_cell = cell(1,3);
dir_parent = '/Users/liuyiyao/Documents/UB_study/Research/Experiments/early_classification/';
dataset_name = 'ecg';
%% add into one dataset
rng(6);
path = data_path_class_1;
file_list_1 = dir(data_path_class_1);
file_list_2 = dir(data_path_class_2);

file_number_1 = size(file_list_1,1);
file_number_2 = size(file_list_2,1);

file_name = file_list_1(3).name;
    split_name = split(file_name,'.');
    data_cell{1,1} = 1;
    if strcmp(split_name{2}, '0')
        data_load = load(strcat(path, '/', file_name));
        data_cell{1,2} = data_load(:,2);
    end
    if strcmp(split_name{2}, '1')
        data_load = load(strcat(path, '/', file_name));
        data_cell{1,3} = data_load(:,2);
    end

for i = 4:file_number_1
    file_name = file_list_1(i).name;
    split_name = split(file_name,'.');
    if strcmp(split_name{2}, '0')
        data_load = load(strcat(path, '/', file_name));
        data_cell{end+1,2} = data_load(:,2);
    end
    if strcmp(split_name{2}, '1')
         data_load= load(strcat(path, '/', file_name));
         data_cell{end,3} = data_load(:,2);
    end
            data_cell{end,1} = 1;
            data_cell{end, 4} = data_load(:,1);
            

    
end
num_class_1 = size(data_cell,1);


path = data_path_class_2;
for i = 3:file_number_2
    file_name = file_list_2(i).name;
    split_name = split(file_name,'.');
    
    if strcmp(split_name{2}, '0')
        data_load= load(strcat(path, '/', file_name));
        data_cell{end+1,2} = data_load(:,2);
    end
    if strcmp(split_name{2}, '1')
        data_load= load(strcat(path, '/', file_name));
        data_cell{end,3}  = data_load(:,2);
    end
    data_cell{end, 1} = 2;
    data_cell{end, 4} = data_load(:,1);
end

%%
num_train_each_class = 30;
train_class_1_index = randsample(1: num_class_1,num_train_each_class );
train_class_2_index = randsample(num_class_1 +1: size(data_cell,1),num_train_each_class );
train_index = [train_class_1_index, train_class_2_index];
TRAIN = data_cell(ismember(1: size(data_cell,1),train_index ),:);
TEST = data_cell(ismember(1: size(data_cell,1),train_index ) == 0,:);

%% sample subsequence
num_class = 2;
class_index_cell= cell(num_class, 1);
class_index_cell{1,1} = 1: num_train_each_class;
class_index_cell{2,1} = num_train_each_class :num_train_each_class*2;
length_vec = [5,7,10,13,15,17,20,23,25,27,30];
num_variable = 2;
[ subsequence_cell ] = multivar_subsequence_sample( TRAIN, 30, 20, length_vec, class_index_cell, num_variable);

%%
candidate_shapelet_cell = {};
ind = 1;
for i = 1: size(subsequence_cell,1)
%     status = is_candidate_shapelet( TRAIN, subsequence_cell{i,1}, 0.85, subsequence_cell{i,2} );
        status = is_candidate_shapelet_multivar( TRAIN, subsequence_cell{i,1}, 0.75, subsequence_cell{i,2},subsequence_cell{i,4}, num_class ); %beef

    if status == 1
        candidate_shapelet_cell{ind, 1} = subsequence_cell{i,1};
        candidate_shapelet_cell{ind, 2} = subsequence_cell{i,2};
        candidate_shapelet_cell{ind, 3} = subsequence_cell{i,3};
        candidate_shapelet_cell{ind, 4} = subsequence_cell{i,4};
        ind = ind+1;
    end
end
%% reshuffle TRAIN

TRAIN = TRAIN(randperm(size(TRAIN, 1)), :);

%%
num_train = size(TRAIN,1);
train_distance_data = cell(num_train,2);
for i = 1: num_train
       [train_distance_data{i,1},train_distance_data{i,2} ]= train_seq2shapelet_multivar( TRAIN(i,:), candidate_shapelet_cell ) ; 
end


num_test = size(TEST,1);
test_distance_data = cell(num_test,2);
for i = 1: num_test
       [test_distance_data{i,1},test_distance_data{i,2} ]= train_seq2shapelet_multivar( TEST(i,:), candidate_shapelet_cell ) ; 
end
%% generate training data_label
num_train = size(TRAIN,1);
% num_class = size(class_label,1);
% class_label = unique(TRAIN(:,1));
class_label = [1; 2];
for i = 1:num_train
    num_time = size(train_distance_data{i,2}, 1);
    time_index = train_distance_data{i,2};
    train_distance_data{i, 3} = label_generation(TRAIN{i,1},class_label, num_time, time_index );
end

save_path = strcat(dir_parent, 'result/', dataset_name, '_new_version');
mkdir(save_path);
save(strcat(save_path, '/shapelet.mat'), 'candidate_shapelet_cell');









